## ----------------------------------------------------------------------
##
## Regress census outcomes on raid damages
## Note:
##
## ----------------------------------------------------------------------

## environment setting
Sys.setenv(LANGUAGE="en")
gc();gc()
rm(list = ls())
options(scipen = 999)   ## Disable scientific notation

## ----------------------------------------------------------------------
##
## Initial settings
##
## ----------------------------------------------------------------------
## Set directory
## ----------------------------------------------------------------------
root_dir = "YOUR_DIRECTORY/Replication_Folder"
## Load packages and functions
source(file.path(root_dir, "2_Code/1_PackagesFunctions.R"))

## ----------------------------------------------------------------------
## Data directory
data_dir = root_dir	%>%	file.path("1_Data")	%>%	dir_create()
## Output directory
output_dir = root_dir	%>%	file.path("3_Result")	%>%	dir_create()
figure_dir = root_dir	%>%	file.path("4_Figures")	%>%	dir_create()
## Working directory
working_dir = root_dir	%>%	file.path("5_Working")	%>%	dir_create()
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
##
## Prepare variables
##
## ----------------------------------------------------------------------
## Dependent variables
## ----------------------------------------------------------------------
## Logged DVs
depvar_vec = c(
  "lnr_unemp", "lnrm_unemp", "lnrf_unemp",
  "lnr_proexe", "lnave_lvlength", "lnave_eduyr"
)
## ----------------------------------------------------------------------
## Logged negative controls
negative_cntrlz = c("lnrmale0004")
## ----------------------------------------------------------------------
## Dependent variable vector (all DVs and negative controls combined): 22 DVs
depvar_vec_combined = c(depvar_vec, negative_cntrlz)
## Dependent variables: for alternative treatment indicators
depvar_vec_alternative_treatment = c(depvar_vec, negative_cntrlz)

## ----------------------------------------------------------------------
## Treatment variables
## ----------------------------------------------------------------------
treatment_vec = c("ratio_damage", "ln_damage_ratio", "binary_damage_ratio")
robust_treatment_vec = c(treatment_vec[2:3], str_c("as.factor(", treatment_vec[1],")"))
## ----------------------------------------------------------------------
## Pretreatment covariates
## ----------------------------------------------------------------------
## Distance variables
distance_termz_base = c("palace_dist_ln", "minTargetDistance_ln")
## Polynomial
polynomial_degree = 5
# distance_termz_poly = polynomial_varname(distance_termz_base, degree = polynomial_degree)
distance_termz_poly = str_c("poly(", distance_termz_base, ", degree = ", polynomial_degree, ", simple = TRUE, raw = TRUE)")
distance_term_specification = list( distance_termz_poly, str_c("s(", distance_termz_base, ")") )
## Geographical features
spatial_covariates = c(
  ## Residential ratio and geographical area
  "ratio_residential", "poly_area_ln", "n_neighbor",
  ## Prewar population density
  "PopDensity_1939_km2_ln",
  ## Terrain variables
  "mean_elevation_ln", "mean_slope_ln", "river_distance_ln",
  ## Railway
  "railway_length_ln", "n_stations", "SpLag_railway_length_ln", "SpLag_n_stations"
)
## Flammability score
flame_scorez = c("hazard", "max_nghb_hazard")
## ----------------------------------------------------------------------
## Spatial spline/polynomials and fixed effects
## ----------------------------------------------------------------------
## District FEs
district_fe = "as.factor(prewar_district)"	## 35-district FE
## Spatial terms
spatial_term_lbl = c("Polynomial", "GAM")	## This indicates polynomial or spline specification in the csv files
lonlat_base = c("z_lon", "z_lat")
spatial_polynomial = str_c("poly(z_lon, z_lat, degree = ",  polynomial_degree, ", simple = TRUE, raw = TRUE)")
spatial_term_specification = list(spatial_polynomial, "te(longitude, latitude)")
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Prepare models
##
## ----------------------------------------------------------------------
## 1. Polynomial model
base_polynomial = chr2fml("outcome",
                          idv_list = list(
                            treatment_vec[1], spatial_covariates, district_fe,
                            distance_term_specification[[1]], spatial_term_specification[[1]]
                          )
)
## ----------------------------------------------------------------------
## 2. GAM with additional spline terms
base_gam = chr2fml("outcome",
                   idv_list = list(
                     treatment_vec[1], spatial_covariates, district_fe,
                     distance_term_specification[[2]], spatial_term_specification[[2]]
                   )
)
## ----------------------------------------------------------------------
## 3. Factor raid
factor_polynomial = base_polynomial	%>%	update(str_c(". ~ . -", treatment_vec[1], "+ as.factor(", treatment_vec[1], ")"))
## ----------------------------------------------------------------------
## Model list obj
model_list = list(base_polynomial, base_gam)
model_lbl = c("Polynomial", "GAM")
## ----------------------------------------------------------------------


## ----------------------------------------------------------------------
##
## Prepare data objects
##
## ----------------------------------------------------------------------
## Read data
tokyo2keep = data_dir	%>%
  file.path("census_data4.csv") %>%
  read_csv(col_types = cols()) %>%
  mutate(group = "FullSample")


## ----------------------------------------------------------------------
## Nested data
## --- Full sample
full_sample = tokyo2keep %>% group_by(year, group) %>% nest()
## --- Residential ratio subsamples
resid_nested = tokyo2keep	%>%	mutate(group = str_c("Resid_", ResidRatioSubsampleLbl))	%>%
  group_by(year, group)	%>%	nest()
## --- 10km subsamples
distance_nested = tokyo2keep	%>%
  mutate(
    group = str_c("Dist_", PalaceDistanceSubsampleLbl)
  )	%>%
  group_by(year, group)	%>%	nest()
## --- Subsamples with and without prewar hazard rating
hazard_nested = tokyo2keep	%>%
  mutate(group = str_c("HazardAvailable_", HazardAvailable))	%>%
  group_by(year, group)	%>%	nest()
## ----------------------------------------------------------------------
## Combined and nested data
tokyo_nested = resid_nested	%>%
  bind_rows(hazard_nested)	%>%
  bind_rows(distance_nested)	%>%
  bind_rows(full_sample)	%>%
  select(year, group, data)
## ----------------------------------------------------------------------
## Subsample vector
subsample_vec = tokyo_nested	%>%	pull(group)	%>%	unique()	## Subsample indicators
subsample_vec
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years
## ----------------------------------------------------------------------
## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district	%>%	unique()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Prepare output matrix
##
## ----------------------------------------------------------------------
## Output matrix # columns
ncol_tmp_mtrx = 8

## create empty output matrix
tmp_mtrx0 = matrix(NA, nrow = 30, ncol = ncol_tmp_mtrx)

## assing names to columns
colnames(tmp_mtrx0)[1:ncol_tmp_mtrx] = c("DepVar", 
                                        "year", 
                                        "Sample", 
                                        "tau",
                                        "se", 
                                        "ci_lower", 
                                        "ci_upper", 
                                        "N_obs")

## ----------------------------------------------------------------------
##
## Prepare graph parameters --- All DVs by year (annual subsamples), w/o 6F+ building
##
## ----------------------------------------------------------------------
## Figure parameters
fig_height = 4.25
base_cex = .85
fig_mrgn = c(3.25, 2.4, 1.65, 1) + 0.1    ## figure margin: bottom, left, top, right
plot_col = c("lightsteelblue4", "lightsteelblue1", "magenta2")
pt_cex = .6
polygon_alpha = 0.25
x_tick = 1:30
year_lbl = "Census Year"
y_tick_lim = c(-4, 4)
y_range = c(-0.15, 0.15)
y_tick = seq(y_range[1], y_range[2], by = 0.05) %>% round(2)
y_tick_sub = seq(-1, 1, by = 0.01) %>% round(2)
y_tick_subsub = seq(-1, 1, by = 0.01)
separation_linez = c(5, 10, 15, 20, 23, 25) + 0.5
## ----------------------------------------------------------------------
## Outcome labels
outcome_pos = c(3, 8, 13, 18, 22, 24.5, 28)
outcome_pos_1 = c(1, 3, 5, 7)
outcome_pos_2 = c(2, 4, 6)
outcome_lbl = c("% Unemp", "% Unemp (Male)", "% Unemp (Female)", "% Prof Exec", "Ave Res Yrs", "Ave Edu Yrs", "% Male <6 y/o")
outcome_lbl = str_c("ln ", outcome_lbl)
## ----------------------------------------------------------------------
## Rectangle (border) width
rect_width = 0.5
rect_lwd = 0.5
point_est_lwd = 1.25
## ----------------------------------------------------------------------
## Colors
colz_vec = c("lightsteelblue4", "lightsteelblue4", "gray65", "magenta2")
alpha_vec = c(0.6, 0.3, 1)
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Figure 6
## Result I: Socioeconomic results---GAM
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_gam %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = tokyo_nested	%>%
      filter(group=="FullSample") %>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      
      ## Record outputs
      tmp_info = model_info_gam(tmp_est, x2extract = "ratio_damage")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here


## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "ratio_damage"	## Add predictor (model specification)
  )	%>%
  drop_na()


## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_6.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Figure 7
## Result I: Socioeconomic results---factorized damage 2010
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1
m = 2 #GAM

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_alternative_treatment)) { ## Loop over outcomes
  ## Set dependent variable
  tmp_depvar = depvar_vec_alternative_treatment[i]
  ## ----------------------------------------------------------------------
  ## Loop over census years
  ## Census year
  tmp_year = 2010
  ## Output matrix
  tmp_mtrx = matrix(NA, nrow = 10, ncol = ncol_tmp_mtrx)
  tmp_mtrx[,1] = i
  tmp_mtrx[,2] = tmp_year
  tmp_mtrx[,3] = rep(3, 10)
  ## ----------------------------------------------------------------------
  ## Loop over treatment specifications
  ## Print progress
  cat(str_c("Spatial specification: ", spatial_term_lbl[m], " --- dependent variable: ", i, "/", length(depvar_vec_alternative_treatment), "\r")); flush.console()
  ## ----------------------------------------------------------------------
  ## Pull out subset
  tmp_data = tokyo_nested	%>%
    filter(group=="FullSample") %>%
    filter(year == tmp_year) %>%
    unnest(cols = c(data)) %>%
    drop_na(all_of(tmp_depvar))
  ## Treatment setting
  tmp_treatment = "as.factor(ratio_damage)"
  ## Set model specification
  tmp_mod = model_list[[m]]   %>% update(str_c(". ~ . - ratio_damage + ", tmp_treatment)) %>%
    update(str_c(tmp_depvar, " ~ ."))
  ## Estimate and pull out coef and SEs
  if (nrow(tmp_data) > 0) {
    ## GAM
    tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
    tmp_info = model_info_gam(tmp_est, x2extract = tmp_treatment)
    ## Store estimates
    tmp_mtrx[1:10,4:8] = cbind(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
  }
  ## ----------------------------------------------------------------------
  ## Combine output matrix
  if (i == 1) {
    out_mtrx = tmp_mtrx
  }	else	{
    out_mtrx = rbind(out_mtrx, tmp_mtrx)
  }
}	## Treatment loop ends here


## ----------------------------------------------------------------------
## Modify outputs
colnames(out_mtrx) = c("DepVar", "year", "Treatment", "tau", "se", "ci_lower", "ci_upper", "N_obs")
out_dat = out_mtrx	%>%
  as_tibble	%>%
  mutate(
    DepVar = depvar_vec_alternative_treatment[DepVar],
    Treatment = robust_treatment_vec[Treatment],
    Predictor = as.character(tmp_mod[3])
  )	%>%
  drop_na()	


## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Figure parameters
outcome_lbl_wo6f = outcome_lbl[!str_detect(outcome_lbl, "Apartment")]
tmp_fig_height = 7
tmp_fig_mrgn = c(7.75, 2.5, 2.25, .65) + 0.1    ## figure margin: bottom, left, top, right
separation_linez_2010 = seq(10, 40, by = 10) + 0.5
year_pos = seq(10, 50, by = 10) - 4.5
## Read estimates
out_dat_master = out_dat
## Pull out DV (names)
tmp_depvar_vec = out_dat_master$DepVar %>% unique

## ----------------------------------------------------------------------
## Summary plot for each census years
## ----------------------------------------------------------------------
# tmp_depvar_vec = tmp_depvar_vec[!str_detect(tmp_depvar_vec, "lnrm_unemp|lnrf_unemp")]
tmp_depvar_mat = tibble(DepVar = tmp_depvar_vec, FigLabel = outcome_lbl_wo6f[1:7])  %>%
  ## Drop male/female unemployment rates
  filter(
    tmp_depvar_vec != "lnrm_unemp",
    tmp_depvar_vec != "lnrf_unemp"
  )
## Pull out estimates
out_dat = out_dat_master   %>%
  filter(str_detect(Treatment, "as.factor"))  %>%
  drop_na %>%
  mutate(
    Treatment = str_c(Treatment, rep(seq(10, 100, by = 10), times = 7)),
    Treatment = str_replace_all(Treatment, "as.factor\\(ratio_damage\\)", "RaidDamageDummy_")
  ) %>%
  filter(
    year == 2010,
    DepVar %in% tmp_depvar_mat$DepVar
  )	%>% drop_na %>%
  mutate(
    row_id = row_number(),
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower > 0 & ci_upper > 0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower < 0 & ci_upper < 0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col),
    ## Dummy labels for figure
    dummy_label = str_replace_all(Treatment, "RaidDamageDummy_", "Raid Damage ") %>% str_c("%")
  )   %>%
  left_join(tmp_depvar_mat)

## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_7.pdf")
pdf(tmp_figure_path, width = plt_ratio(tmp_fig_height), height = tmp_fig_height)
# dev.new(width = plt_ratio(tmp_fig_height), height = tmp_fig_height)
x_range = c(0.25, (nrow(out_dat)+0.75))
y_range_tmp = c(-0.15, 0.16)
par(cex = base_cex, mar = tmp_fig_mrgn, lend = "square")
plot(out_dat$tau, y = out_dat$row_id, ylim = y_range_tmp, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)
## Separation lines between census years
abline(v = separation_linez_2010)
## Rectangles
abline(v = out_dat$row_id, col = colz_vec[3], lwd = .65, lty = "dotted")
rect(xleft = out_dat$row_id-rect_width/2, xright = out_dat$row_id+rect_width/2, ybottom = out_dat$ci_lower, ytop = out_dat$ci_upper, col = out_dat$point_col, border = "black", lwd = rect_lwd)
segments(out_dat$row_id-rect_width/2, x1 = out_dat$row_id+rect_width/2, y0 = out_dat$tau, lwd = point_est_lwd)
## Horizontal axis
axis(1, line = 0, at = 1:nrow(out_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez_2010, label = NA, tck = -0.3)
## --- Dummy label on the horizontal axis
mtext(out_dat$dummy_label, side = 1, line = .5, at = out_dat$row_id, cex = 0.8, las = 2)
## --- DV label
dv_lbl = out_dat    %>% pull(FigLabel)  %>% unique
dv_lbl[1] = str_c("DV: ", dv_lbl[1])
mtext(dv_lbl, side = 3, line = 0, at = year_pos, las = 1, cex = 1)   ## census year
mtext(str_c("Year: 2010"), side = 3, line = 1.15, at = 0.2, las = 1, cex = 1.35, adj = 0)
## Vertical axis
for (j in c(2,4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
  axis(j, line = 0, at = y_tick_subsub, label = NA, tck = -0.005)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 1)
## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()
## Close and save
dev.off()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Table A2
## Descriptive Statistics (2 of 2)
## See 2_NinkaRegression_MainTxt&AppxA for the descriptive stats for
## the neighborhood association outcome variables and covariates.
##
## ----------------------------------------------------------------------
## Prepare the object
## ----------------------------------------------------------------------
descriptive_tbl = tokyo2keep	%>%
  select(
    key_code, year, binary_damage_ratio,
    all_of(depvar_vec), negative_cntrlz) %>%
  filter(!is.na(binary_damage_ratio)) %>%
  pivot_wider(names_from = year, values_from = c(depvar_vec, negative_cntrlz))	%>%
  discard(~ all(is.na(.x)))	## Some columns only available in limited years
## ----------------------------------------------------------------------
## Table A.2: Descriptive Statistics
## ----------------------------------------------------------------------
tableContinuous(
  vars = descriptive_tbl	%>%	select(-key_code, -binary_damage_ratio)	%>%	as.data.frame(),
  group = descriptive_tbl$binary_damage_ratio,
  stats = c("n", "mean", "s", "median", "iqr"),
  prec = 3)
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Figure A4
## Result I: Socioeconomic results---5th poly
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years


## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_polynomial %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = tokyo_nested	%>%
      filter(group=="FullSample") %>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_mod_woFE = update(tmp_mod, .~. - as.factor(prewar_district))
      tmp_est = lm_robust(tmp_mod_woFE, data = tmp_data, weights = tmp_data$pop, fixed_effects = ~prewar_district)
      ## Record outputs
      tmp_info = model_info_robustlm(tmp_est, x2extract = "ratio_damage")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here


## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "ratio_damage"	## Add predictor (model specification)
  )	%>%
  drop_na()


## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_a4.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Figure A5
## Result I: Socioeconomic results---GAM, binary_damage_ratio
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_gam %>% update(str_c(". ~ . - ratio_damage + binary_damage_ratio")) %>%
    update(str_c(tmp_depvar, " ~ ."))
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = tokyo_nested	%>%
      filter(group=="FullSample") %>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      
      ## Record outputs
      tmp_info = model_info_gam(tmp_est, x2extract = "binary_damage_ratio")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here


## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "binary_damage_ratio"	## Add predictor (model specification)
  )	%>%
  drop_na()

## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_a5.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
# tmp_figure_path = file.path(tmp_dir, str_c("AllDepVars_6Fdropped_", logged_or_not[m], "_Sample_", tmp_sample_lbl, ".png"))
# png(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height, res = 480, units = "in")
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Figure A6
## Result I: Socioeconomic results---GAM, subsample >70% residential
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_gam %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = tokyo_nested	%>%
      filter(group=="Resid_(0.6,1]") %>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      
      ## Record outputs
      tmp_info = model_info_gam(tmp_est, x2extract = "ratio_damage")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here


## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "ratio_damage"	## Add predictor (model specification)
  )	%>%
  drop_na()


## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_a6.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
# tmp_figure_path = file.path(tmp_dir, str_c("AllDepVars_6Fdropped_", logged_or_not[m], "_Sample_", tmp_sample_lbl, ".png"))
# png(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height, res = 480, units = "in")
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------



## ----------------------------------------------------------------------
##
## Figure A7
## Result I: Socioeconomic results---GAM, within 10km radius
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_gam %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = tokyo_nested	%>%
      filter(group=="Dist_<10km") %>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      
      ## Record outputs
      tmp_info = model_info_gam(tmp_est, x2extract = "ratio_damage")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here


## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "ratio_damage"	## Add predictor (model specification)
  )	%>%
  drop_na()

## ----------------------------------------------------------------------
## Preparing figure
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
tmp_figure_path = file.path(figure_dir, "fig_a7.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
# tmp_figure_path = file.path(tmp_dir, str_c("AllDepVars_6Fdropped_", logged_or_not[m], "_Sample_", tmp_sample_lbl, ".png"))
# png(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height, res = 480, units = "in")
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Figure A8: Socioeconomic results---GAM, ctrl 5th poly of prewar cov
##
## ----------------------------------------------------------------------
## Prepare variables
## ----------------------------------------------------------------------
## Prewar additional terms
prewarcov_plus_base = c("ratio_residential", "PopDensity_1939_km2_ln")

prewarcov_plus_poly = c(
  polynomial_varname(prewarcov_plus_base, degree = polynomial_degree),
  "I(ratio_residential*PopDensity_1939_km2_ln)", 
  "I(ratio_residential^2*PopDensity_1939_km2_ln)",
  "I(ratio_residential*PopDensity_1939_km2_ln^2)",
  "I(ratio_residential^2*PopDensity_1939_km2_ln^2)",
  "I(ratio_residential^3*PopDensity_1939_km2_ln)", 
  "I(ratio_residential*PopDensity_1939_km2_ln^3)",
  "I(ratio_residential^4*PopDensity_1939_km2_ln)", 
  "I(ratio_residential*PopDensity_1939_km2_ln^4)", 
  "I(ratio_residential^3*PopDensity_1939_km2_ln^2)", 
  "I(ratio_residential^2*PopDensity_1939_km2_ln^3)"
)

## ----------------------------------------------------------------------
## Prepare models
## ----------------------------------------------------------------------
## model = GAM
## add.feature = prewarcov_plus
base_gam_prewarcov_plus_poly = chr2fml("outcome",
                                       idv_list = list(
                                         "ratio_damage", 
                                         spatial_covariates, 
                                         district_fe,
                                         distance_term_specification[[2]], 
                                         spatial_term_specification[[2]],
                                         prewarcov_plus_poly
                                       ))

## ----------------------------------------------------------------------
## Prepare data obj
## ----------------------------------------------------------------------
## --- Adding newly defined covariates to the dataset
full_sample = tokyo2keep	%>%
  drop_na(ratio_damage, pop)	%>%
  mutate_at(vars(all_of(prewarcov_plus_base)), list(scale2)) %>%
  group_by(year)	%>%
  nest

## ----------------------------------------------------------------------
## Analysis
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## create empty output matrix
tmp_mtrx = tmp_mtrx0

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  # tmp_mod = base_gam %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  tmp_mod = base_gam_prewarcov_plus_poly %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    
    ## select year
    tmp_year = vec_year[y]
    
    ## Prepare temp data
    tmp_data = full_sample	%>%
      filter(
        year == tmp_year
      )	%>%
      unnest(cols = c(data))	%>%
      drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
    
    ## enter information to the output matrix
    tmp_mtrx[r,1] = i						## Dependent variable indicator
    tmp_mtrx[r,2] = tmp_year			## Census year
    tmp_mtrx[r,3] = "FullSample" ## (Sub)sample indicator
    
    
    ## Do estimation only if the data exist in a given year
    if (nrow(tmp_data) > 0) {
      ## ----------------------------------------------------------------------
      ## Estimation
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      
      ## Record outputs
      tmp_info = model_info_gam(tmp_est, x2extract = "ratio_damage")
      tmp_mtrx[r,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
      ## update row number
      r = r+1
      
    } ## if ends here
  } ## year loop ends here
} ## outcome loop ends here

## format outputs for plot
out_dat = as.data.frame(tmp_mtrx) %>%
  ## set as numeric
  mutate_at(vars(-Sample), as.numeric) %>%
  as_tibble %>%
  mutate(
    DepVar = depvar_vec_combined[as.numeric(DepVar)],	## Dependent variable label
    Predictor = "ratio_damage"	## Add predictor (model specification)
  )	%>%
  drop_na()


## ----------------------------------------------------------------------
## Plot estimates --- All DVs by year (annual subsamples)
## ----------------------------------------------------------------------
## Pull out DV names
tmp_depvar_vec = out_dat$DepVar %>% unique

## Prepare estimates
tmp_plot_dat = out_dat	%>%
  mutate(
    point_col = ifelse(tau>0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower>0 & ci_upper>0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower<0 & ci_upper<0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col)
  ) %>%
  mutate(row_id = row_number())


## Creating Plot
tmp_figure_path = file.path(figure_dir, "fig_a8.pdf")
pdf(tmp_figure_path, width = plt_ratio(fig_height), height = fig_height)
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(tmp_plot_dat)+0.75))
plot(x = tmp_plot_dat$row_id, y = tmp_plot_dat$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)

## Background for male ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))

## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(tmp_plot_dat), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides

## Rectangles
rect(xleft = tmp_plot_dat$row_id-rect_width/2, xright = tmp_plot_dat$row_id+rect_width/2, ybottom = tmp_plot_dat$ci_lower, ytop = tmp_plot_dat$ci_upper, col = tmp_plot_dat$point_col, border = "black", lwd = rect_lwd)
segments(x0 = tmp_plot_dat$row_id-rect_width/2, x1 = tmp_plot_dat$row_id+rect_width/2, y0 = tmp_plot_dat$tau, lwd = point_est_lwd)

## Horizontal axis
axis(1, line = 0, at = 1:nrow(tmp_plot_dat), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(tmp_plot_dat$year, side = 1, line = 0.5, at = 1:nrow(tmp_plot_dat), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)

## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)

## Showing outcome variables in rows.
mtext(outcome_lbl[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)

## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()

## Close device
dev.off()
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Figure A9: Socioeconomic results---GAM, unit zipcode
##
## ----------------------------------------------------------------------
## Data
census_zipcode_tbl = data_dir	%>%	file.path("census_zipcode_data.csv")	%>%	read_csv(col_types = cols())
## Outcomes
depvar_vec_tmp = c(depvar_vec, negative_cntrlz)
## Prepare list obj for baseline result table
baseline_2010_gam = list_along(depvar_vec_tmp)
names(baseline_2010_gam) = depvar_vec_tmp
baseline_2010_wls = baseline_2010_gam

## ----------------------------------------------------------------------
## Loop over spatial settings --- polynomials, spline, and distance variable spline
## Loop over outcomes (indexed by i)
for (i in seq_along(depvar_vec_tmp)) {
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_tmp[i]
  tmp_mod = model_list[[2]]	%>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  ## ----------------------------------------------------------------------
  ## Loop over census years
  for (y in seq_along(vec_year)) {
    ## Census year
    tmp_year = vec_year[y]
    ## Prepare output matrix: tau, SE, etc.
    tmp_mtrx = matrix(NA, nrow = 1, ncol = ncol_tmp_mtrx)
    tmp_mtrx[,1] = i						## Dependent variable indicator
    tmp_mtrx[,2] = tmp_year					## Census year
    ## Prepare output matrix: EDFs
    tmp_n_spline = str_count(as.character(tmp_mod)[3], "s\\(|te\\(")	%>%	as.numeric + 1	## +1 for the geo-coordinate spline
    tmp_edf_mtrx = matrix(NA, nrow = 1, ncol = tmp_n_spline)
    ## ----------------------------------------------------------------------
    ## Prepare subsample
    tmp_data = census_zipcode_tbl	%>%	filter(year == tmp_year)	%>%	drop_na(all_of(tmp_depvar))
    ## ----------------------------------------------------------------------
    ## Estimate and pull out coefficients and SEs
    if (nrow(tmp_data) > 0) {
      
      ## GAM
      tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
      tmp_info = model_info_gam(tmp_est, x2extract = treatment_vec[1])
      ## Extract the effective degrees of freedom
      tmp_edf = pen.edf(tmp_est)
      tmp_edf_mtrx[1,] = tmp_edf
      colnames(tmp_edf_mtrx) = names(tmp_edf)
      ## Store estimates
      tmp_mtrx[1,4:8] = c(tmp_info, nrow(tmp_data))	## beta, SE, and CIs + N obs
    }
    ## ----------------------------------------------------------------------
    ## Combine output matrix
    tmp_mtrx = cbind(tmp_mtrx, tmp_edf_mtrx)
    if (y == 1 & i == 1) {
      out_mtrx = tmp_mtrx
    }	else	{
      out_mtrx = rbind(out_mtrx, tmp_mtrx)
    }
  }	## year loop ends here
}	## outcome loop ends here
## ----------------------------------------------------------------------
## Modify outputs
colnames(out_mtrx)[1:ncol_tmp_mtrx] = c("DepVar", "year", "Sample", "tau", "se", "ci_lower", "ci_upper", "N_obs")
estimate_tbl = out_mtrx	%>%	as_tibble()	%>%
  mutate(
    Sample = "zipcode_polygon",
    DepVar = depvar_vec_tmp[DepVar],	## Dependent variable label
    Predictor = as.character(tmp_mod[3])	## Add predictor (model specification)
  )	%>%
  drop_na()
## ----------------------------------------------------------------------

## ----------------------------------------------------------------------
## Preparing Figure
## ----------------------------------------------------------------------
## Figure parameters
fig_height = 4.25
base_cex = .85
fig_mrgn = c(3.25, 2.4, 1.65, 1) + 0.1	## figure margin: bottom, left, top, right
y_range = c(-0.2, 0.2)
y_tick = seq(y_range[1], y_range[2], by = 0.05) %>% round(2)
y_tick_sub = seq(-1, 1, by = 0.01)	%>%	round(2)
separation_linez = c(5, 10, 15, 20, 23, 25) + 0.5
## ----------------------------------------------------------------------
## Outcome labels
outcome_pos = c(3, 8, 13, 18, 22, 24.5, 28)
outcome_pos_1 = c(1, 3, 5, 7)
outcome_pos_2 = c(2, 4, 6)
outcome_lbl_wo6f = outcome_lbl[!str_detect(outcome_lbl, "Apartment")]
## ----------------------------------------------------------------------
## Read estimates
csv_path_vec = estimate_tbl
## ----------------------------------------------------------------------
## Loop over models: WLS and GAM
## Pull out regression estimates
estimate_tbl = estimate_tbl	%>%
  mutate(
    row_id = row_number(),
    point_col = ifelse(tau > 0, adjustcolor(colz_vec[1], alpha = alpha_vec[2]), adjustcolor(colz_vec[2], alpha = alpha_vec[2])),
    point_col = ifelse(ci_lower > 0 & ci_upper > 0, adjustcolor(colz_vec[1], alpha = alpha_vec[1]), point_col),
    point_col = ifelse(ci_lower < 0 & ci_upper < 0, adjustcolor(colz_vec[2], alpha = alpha_vec[1]), point_col))
## ----------------------------------------------------------------------
## Creating Plot
## ----------------------------------------------------------------------
## Open device
pdf(
  figure_dir	%>%	file.path("fig_a9.pdf"),
  width = plt_ratio(fig_height), height = fig_height)
## Plot
par(cex = base_cex, mar = fig_mrgn, lend = "square")
x_range = c(0.25, (nrow(estimate_tbl) + 0.75))
plot(estimate_tbl$row_id, y = estimate_tbl$tau, ylim = y_range, axes = FALSE, xlab = NA, ylab = NA, type = "n", yaxs = "i", xaxs = "i", xlim = x_range)
## Background for the boy ratio
rect(xleft = separation_linez[6], xright = 40, ytop = 1, ybottom = -1, col = adjustcolor("gray", alpha = 0.2))
## Separation lines between DVs
abline(v = separation_linez)
abline(v = 1:nrow(estimate_tbl), lwd = 0.35, lty = "dotted", col = colz_vec[3]) ## horizontal guides
## Rectangles
rect(xleft = estimate_tbl$row_id - rect_width/2, xright = estimate_tbl$row_id + rect_width/2, ybottom = estimate_tbl$ci_lower, ytop = estimate_tbl$ci_upper, col = estimate_tbl$point_col, border = "black", lwd = rect_lwd)
segments(x0 = estimate_tbl$row_id - rect_width/2, x1 = estimate_tbl$row_id + rect_width/2, y0 = estimate_tbl$tau, lwd = point_est_lwd)
## Horizontal axis
axis(1, line = 0, at = 1:nrow(estimate_tbl), label = NA, tck = -0.01)
axis(1, line = 0, at = separation_linez, label = NA, tck = -0.115)
mtext(estimate_tbl$year, side = 1, line = 0.5, at = 1:nrow(estimate_tbl), las = 2, cex = 0.85)
mtext(year_lbl, side = 1, line = 2.45, cex = 0.8)
## Vertical axis
for (j in c(2, 4)) {
  axis(j, line = 0, at = y_tick, label = NA)
  axis(j, line = 0, at = y_tick_sub, label = NA, tck = -0.01)
}
mtext(y_tick, side = 2, line = 0.5, at = y_tick, cex = 0.8)
mtext(outcome_lbl_wo6f[outcome_pos_1], side = 3, line = 0, at = outcome_pos[outcome_pos_1], cex = 0.85)
mtext(outcome_lbl_wo6f[outcome_pos_2], side = 3, line = 0.85, at = outcome_pos[outcome_pos_2], cex = 0.85)
mtext("Raid Effect (95% CIs)", side = 2, line = 1.65, cex = 0.8)
## Zero-reference
abline(h = 0, col = adjustcolor(colz_vec[4], alpha = alpha_vec[3]))
box()
## Close device
dev.off()
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Table A3
## Result: Socioeconomic results---GAM 2010
##
## ----------------------------------------------------------------------
## Setting before loop
## ----------------------------------------------------------------------
## Prepare list obj for baseline result table
baseline_2010_gam = list_along(depvar_vec_combined)
names(baseline_2010_gam) = depvar_vec_combined

## Census year loop
vec_year = seq(1995, 2015, by = 5)	## Census years

## District vector: Unique (35) districts
vec_district = tokyo2keep$prewar_district  %>% unique

## initial row number for empty output matrix
r = 1

## ----------------------------------------------------------------------
## Loop over outcomes (indexed by i)
## ----------------------------------------------------------------------
for (i in seq_along(depvar_vec_combined)) {
  ## Set dependent variable and model specification
  tmp_depvar = depvar_vec_combined[i]
  tmp_mod = base_gam %>%	update(str_c(tmp_depvar, " ~ ."))	## Update DV
  
  ## select year
  tmp_year = 2010
  
  ## Prepare temp data
  tmp_data = tokyo_nested	%>%
    filter(group=="FullSample") %>%
    filter(
      year == tmp_year
    )	%>%
    unnest(cols = c(data))	%>%
    drop_na(all_of(tmp_depvar))	## unnest and drop NAs in DV
  
  ## Do estimation only if the data exist in a given year
  if (nrow(tmp_data) > 0) {
    ## ----------------------------------------------------------------------
    ## Estimation
    tmp_est = gam(tmp_mod, data = tmp_data, weights = tmp_data$pop)
    ## Store models 2010
    baseline_2010_gam[[i]] = tmp_est
    ## update row number
    r = r+1
    
  } ## if ends here
} ## outcome loop ends here

## ----------------------------------------------------------------------
## <<<----<<< Table: Baseline (2010 General Census) estimates >>>---->>>
## ----------------------------------------------------------------------
## Table for baseline regression estimates, 2010
gam_2010 = baseline_2010_gam
## Pull out DVs to be reported
gam_2010 = gam_2010[str_detect(names(gam_2010), "^ln")]		## Logged DVs
gam_2010 = gam_2010[!str_detect(names(gam_2010), "kyodojutaku6up")]	## Drop 6F+ ratio
names(gam_2010)
## Compute sample means (raw (non-logged) mean values)
trans_varz = c("lnr_unemp", "lnrm_unemp", "lnrf_unemp", "lnr_proexe")
mean_tbl = gam_2010	%>%
  ## Pull out data matrix from model objects
  map(extract_vec("model"))	%>%	map(as_tibble)	%>%
  ## Pull out DV vectors
  map(~.x %>% pull(1))	%>%
  ## Transform back to raw scale
  map(exp)	%>%
  enframe(name = "Outcome", value = "Value")	%>%
  group_by(Outcome)	%>%
  ## Transform values
  mutate(
    ValueRaw = if_else(
      Outcome %in% trans_varz,
      lapply(Value, function(x) {x = x -0.01}),
      Value)
  )	%>%
  ## Means and logged means
  mutate(
    Mean_Value = Value	%>%	map(mean),
    Mean_ValueRaw = ValueRaw	%>%	map(mean)
  )	%>%
  unnest(c(Mean_Value, Mean_ValueRaw))	%>%
  mutate(ln_Mean_ValueRaw = log(Mean_ValueRaw))
mean_tbl

## ----------------------------------------------------------------------
## Stargazer
stargazer(
  gam_2010,
  type = "text",
  keep = c("ratio_damage"),
  single.row = FALSE, digits = 4,
  align = TRUE, model.names = FALSE, model.numbers = FALSE,
  dep.var.labels.include = TRUE
)
## ----------------------------------------------------------------------




## ----------------------------------------------------------------------
##
## Table A8
## Result: Socioeconomic results---GAM, ctrl NE damage
##
## ----------------------------------------------------------------------
## Use object "gam_2010" from the above analysis.
## ----------------------------------------------------------------------
## Prepare vectors
key_code_vec = tokyo2keep	%>%	filter(year == 2010) %>%
  drop_na(ratio_damage) %>%
  pull(key_code) 
pop2010_vec = pop = tokyo2keep	%>%	filter(year == 2010) %>%
  drop_na(ratio_damage) %>%
  pull(pop)
## ----------------------------------------------------------------------
## Pull out residuals as a tibble object
DV_lblz = names(gam_2010)
gam_residualz_baseline = gam_2010	%>%	map(residuals, type = "response")	%>%
  enframe(name = "Outcome", value = "Residuals")	%>%
  unnest(cols = c(Residuals))	%>%
  ## Dummy ID for spread() function
  mutate(
    key_code = as.character(rep(key_code_vec, length(gam_2010))),
    population2010 = rep(pop2010_vec, length(gam_2010))
  )	%>%
  spread(key = Outcome, value = Residuals)	%>%
  select(key_code, population2010, all_of(DV_lblz))
## ----------------------------------------------------------------------
## Load and prepare polygon (moved above)
## --- Full polygons
full_poly = data_dir %>% 
  file.path("RaidShp_May2020b.rds") %>% 
  read_rds() %>%
  filter(row_id!=88) ## drop palace polygon
  
full_contig_nb = poly2nb(full_poly, queen = TRUE)
full_contig_listw = nb2listw(full_contig_nb, style = "W", zero.policy = TRUE)	## 1 neighborhood without neighbors
# lapply(full_contig_listw$weights, length)   %>% unlist  ## N neighbor cells
full_poly = full_poly	%>%
  ## Add spatially-lagged Damage
  mutate(
    lagged_damage = lag.listw(full_contig_listw, full_poly$ratio_damage, zero.policy = TRUE)
  )
## ----------------------------------------------------------------------
## --- Neighborhoods in the 2010 regressions
residual_poly = full_poly	%>%
  left_join(
    gam_residualz_baseline,
    by = "key_code"
  )	%>%
  drop_na(lnr_unemp)	## As in the 2010 regression data
## ----------------------------------------------------------------------

## ----------------------------------------------------------------------
## Models w/ spatially-lagged Damage
## ----------------------------------------------------------------------
## Prepare a list object
splag_mod_list = list_along(gam_2010)
names(splag_mod_list) = names(gam_2010)
## Loop over DVs
for(i in seq_along(gam_2010)) {
  ## Pull out and prepare data
  tmp_data = gam_2010[[i]]$model	%>%
    as_tibble()	%>%
    rename(
      prewar_district = 'as.factor(prewar_district)',
      population_weight = '(weights)'
    )	%>%
    mutate(
      ## This can be replaced with "tokyo_census_tbl" now
      lagged_damage = residual_poly$lagged_damage
    )
  ## Estimate
  splag_mod_list[[i]] = gam(update(gam_2010[[i]]$formula, .~. + lagged_damage), data = tmp_data, weights = population_weight)
}

## ----------------------------------------------------------------------
## tidy and skim the results
gam_2010	%>%	map(tidy, parametric = TRUE)	%>%
  enframe	%>%	unnest(cols = c(value))	%>%
  filter(str_detect(term, pattern = "damage"))
splag_mod_list	%>%	map(tidy, parametric = TRUE)	%>%
  enframe	%>%	unnest(cols = c(value))	%>%
  filter(str_detect(term, pattern = "ratio_damage"))
splag_mod_list	%>%	map(tidy, parametric = TRUE)	%>%
  enframe	%>%	unnest(cols = c(value))	%>%
  filter(str_detect(term, pattern = "lagged_damage"))
## ----------------------------------------------------------------------
## Stargazer
stargazer(
  splag_mod_list,
  type = "text",
  keep = c("ratio_damage"),
  single.row = FALSE, digits = 4,
  align = TRUE, model.names = FALSE, model.numbers = FALSE,
  dep.var.labels.include = TRUE
)
## ----------------------------------------------------------------------


## EOF

